options ps=MAX ls=MAX nodate nonumber nocenter;
*options nonotes nosource nosource2 errors=0;



* CHANGING THE WORK DIRECTORY; 
libname user "D:\Olivier\Documents\Sas"; *USER FOLDER;
libname my_files "D:\Olivier\Owncloud\Research\Work_Segregation\Coin_Shared_Programs\Example_dta"; *ORIGIN FOLDER;
libname rec "D:\Olivier\Documents\Sas\Output\"; *OUTPUT FOLDER;
libname est_comp "D:\Olivier\Documents\Sas\Temp\"; *Temporary folder for storing establishment files further used in regressions;


* My coding of aggregate sector; 
* BE: Mining, Manufacturing and Energy;
* F: Construction;
* A&G: A:Agriculture & G:Trade;
* H: Transportation;
* I: Accomodation;
* J & : J. Info. & Comm & R: Leisure;
* K: Finance;
* LM: Real Estate and professional services;
* N: Support services;
* OPQ: State, teaching and health; 
* STU: Other;


proc format; 

	
	VALUE rk
	0.00-0.25="1.F0025"
	0.25-0.75="2.F2575"
	0.75-0.90="3.F7590"
	0.90-0.99="4.F9099" 
	0.99-1.00="5.F9910";

	VALUE decile
	0.0-0.1="d0"
	0.1-0.2="d1"
	0.2-0.3="d2"
	0.3-0.4="d3"
	0.4-0.5="d4"
	0.5-0.6="d5"
	0.6-0.7="d6"
	0.7-0.8="d7"
	0.8-0.9="d8"
	0.9-1.0="d9";

	VALUE age
	00-30="age15-30"
	31-40="age31-40"
	41-55="age41-55"
	56-199="age>=56" ;

	value $orgsizeb
	"00","01","02","03"="size_0-19"
	"04"="size_20-49"
	"05"="size_50-99"
	"06"="size_100-199"
	"07"="size_200-499" 
	"08"="size_500-999" 
	"09","10","11"="size_>1000" 
	;

	value orgsize
	0="orgsize0"
	1="orgsize1"
	2-20="orgsize2"
	21-50="orgsize20"
	51-100="orgsize50"
	101-200="orgsize100"
	201-500="orgsize200"
	501-1000="orgsize500" 
	1001-2500="orgsize1000" 
	2501-5000="orgsize2500" 
	5001-10000="orgsize5000" 
	10001-10000000="orgsize10000_plus"
	;
run;

proc contents data=my_files.example_2010; run;

%macro b(year);
	%let b=b&year;
	data &b;
		set 
		my_files.example_&year (keep=KEY_B KEY_L A11 A12 A13 B21 B22 B23 B25 B26 B27 B42 B52 NACE B421  B422  B28 AGE
			rename=
		(
		b42=wage 
		/*b32=nbhours
		b26=seniority*/

		/*b52=weight*/ /* if no weight variable then do : weight=1;*/

		key_b=firm 
		key_l=est 
		/*a12=orgsize*/ /*use this variable only if establishment sample is not exhaustive. Otherwise, it is 
			better to construct orgsize with the data*/

		a13=ltown /*area (preferentially town) of residence*/
		a11=wtown /*area (preferentially town) of work*/
		b421=nuts3 
		a12=nuts1 


		/*b23=occ
		b25=education*/
		b422=sector
		nace=sector_agg /*Could be created from sector*/
		age=age
		/*b27=full_part_time*/
		
		b21=gender
		b28=migration) )
 
		;

		year=&year;
		*********************************************************;

	*!!!!!!!!!!!! ADAPT WITH YOUR CATEGORIES !!!!!!!!!!!!!!!*;
	weight=1 ; 
	included=(wage>threshold and wage NE . and weight>0 );
	*included=(wage>threshold and wage NE . and weight>0 and seniority>=1);
	* variable redefinition;
	/*occ1=("0"<=occ<"30")*1;*Managers & professionals;
	occ2=("30"<=occ<"40")*1;*Technicians and other intediary jobs;
	occ3=("40"<=occ<"100")*1;*Blue collar & clerks;

	if occ1=1 then occ_g=1;
	else if occ3=1 then occ_g=3;
	else occ_g=2;*/

	age1=(AGE<=30);
	age2=(31<AGE<=40);
	age3=(41<AGE<=55);
	age4=(55<AGE);

	 /* * Alternative coding for another dataset;
        age1=(age = "10-19   " or  age="20-29   " );
	age2=(age="30-39   " );
	age3=(age="40-49   " );
	age4=(age="50-59   " or age="60+     ");*/

	if age1=1 then age_g=1;
	else if age2=1 then age_g=2;
	else if age3=1 then age_g=3;
	else if age4=1 then age_g=4;

	/*educ1=(education=0 or education=1);
	educ2=(education=2 or education=3);
	educ3=(education=4 or education=5);
	educ4=(education=6);

	if educ1=1 then educ_g=1;
	else if educ2=1 then educ_g=2;
	else if educ3=1 then educ_g=3;
	else if educ4=1 then educ_g=4;*/

	if missing(gender)=0 then female=(gender="F");
	male=1-female;

	if missing(migration)=0 then migrant=(migration="PT");
	nonmigrant=1-migrant;

	if missing(sector)=0 then fin_service=(sector="X64" | sector="X65" | sector="X66");
	if missing(sector)=0 then manufacturing=("X04"<sector<"X40");

	if missing(wtown)=0 then financial_center=(wtown="ITF");
*********************************************************;


	*Non wage earners exclusion;
	if wage>0;
	lnwage=log(wage);
	Random=RANUNI(124567);

	
	myid=_N_;

	drop migration gender /*occ seniority education threshold*/;
	run;
	
		%macro analysis(unit);
			proc sql; 
			create table &b.1 as select *, 
			SUM ((included=1)) as &unit._nb,
			SUM (weight*(included=1)) as &unit._weight
			from &b group by &unit.;
			quit;

			proc means data=&b.1; 
			Title "&year. &unit Selection Descriptives";
			var wage;
			class included &unit._weight ;
			types included &unit._weight included*&unit._weight;
			format &unit._weight orgsize.;
			output out=rec.&b.&unit.des;
			weight weight;
			run;

			data rec.&b.&unit.des; set rec.&b.&unit.des;
				year=&year.;
			run;

			proc sort data=&b.1 ; 
				by wage
				random; 
			run;

			data  &b.2; set &b.1 (where=(included=1 and &unit._nb>1 and missing(&unit)=0 )) 
									end=last; 
				sumweight + weight ;
				if last then call symput ('SUM', trim(left(put(sumweight, best.))));
			run;

			proc datasets LIBRARY=user; 
				DELETE &b.1;
			QUIT;

			proc sql; 
				create table &b.3 as select *, 
				lnwage-MEAN(lnwage) as within_lnwage,
				sum((weight)) as countuny,
				sum(weight*(PUT((sumweight/&SUM),rk.)="1.F0025")) as F0025uny,
				sum(weight*(PUT((sumweight/&SUM),rk.)="2.F2575")) as F2575uny, 
				sum(weight*(PUT((sumweight/&SUM),rk.)="3.F7590")) as F7590uny, 
				sum(weight*(PUT((sumweight/&SUM),rk.)="4.F9099")) as F9099uny, 
				sum(weight*(PUT((sumweight/&SUM),rk.)="5.F9910")) as F9910uny,
				sum(weight*(PUT((sumweight/&SUM),decile.)="d0")) as d0uny,
				sum(weight*(PUT((sumweight/&SUM),decile.)="d1")) as d1uny, 
				sum(weight*(PUT((sumweight/&SUM),decile.)="d2")) as d2uny, 
				sum(weight*(PUT((sumweight/&SUM),decile.)="d3")) as d3uny, 
				sum(weight*(PUT((sumweight/&SUM),decile.)="d4")) as d4uny,
				sum(weight*(PUT((sumweight/&SUM),decile.)="d5")) as d5uny,
				sum(weight*(PUT((sumweight/&SUM),decile.)="d6")) as d6uny, 
				sum(weight*(PUT((sumweight/&SUM),decile.)="d7")) as d7uny, 
				sum(weight*(PUT((sumweight/&SUM),decile.)="d8")) as d8uny, 
				sum(weight*(PUT((sumweight/&SUM),decile.)="d9")) as d9uny,
				sum(weight*(female=1)) as femaleuny,
				sum(weight*(female=0)) as maleuny, 
				/*sum(weight*(occ1=1)) as occ1uny, 
				sum(weight*(occ2=1)) as occ2uny, 
				sum(weight*(occ3=1)) as occ3uny,*/
				sum(weight*(migrant=1)) as migrantuny,
				sum(weight*(nonmigrant=1)) as nonmigrantuny,
				/*sum(weight*(educ1=1)) as educ1uny,
				sum(weight*(educ2=1)) as educ2uny,
				sum(weight*(educ3=1)) as educ3uny,
				sum(weight*(educ4=1)) as educ4uny,*/
				sum(weight*(age1=1)) as age1uny,
				sum(weight*(age2=1)) as age2uny,
				sum(weight*(age3=1)) as age3uny,
				sum(weight*(age4=1)) as age4uny
				from &b.2 (where=( wage NE .)) group by &unit.; 
			quit;

			
			%if &unit=est %then %do; 

				proc sql; 
					create table &b.est as select
					min(&unit.) as &unit.,
					min(&year.) as year,
					min(wtown) as wtown,
					min(nuts1) as nuts1,
					min(nuts3) as nuts3,
					min(sector) as sector,
					min(sector_agg) as sector_agg,
					sum(weight) as count,
					sum(weight*(PUT((sumweight/&SUM),rk.)="1.F0025")) as F0025,
					sum(weight*(PUT((sumweight/&SUM),rk.)="2.F2575")) as F2575, 
					sum(weight*(PUT((sumweight/&SUM),rk.)="3.F7590")) as F7590, 
					sum(weight*(PUT((sumweight/&SUM),rk.)="4.F9099")) as F9099, 
					sum(weight*(PUT((sumweight/&SUM),rk.)="5.F9910")) as F9910,
					sum(weight*(female=1)) as female,
					sum(weight*(female=0)) as male, 
					/*sum(weight*(occ1=1)) as occ1, 
					sum(weight*(occ2=1)) as occ2, 
					sum(weight*(occ3=1)) as occ3,*/
					sum(weight*(migrant=1)) as migrant,
					sum(weight*(nonmigrant=1)) as nonmigrant,
					/*sum(weight*(educ1=1)) as educ1,
					sum(weight*(educ2=1)) as educ2,
					sum(weight*(educ3=1)) as educ3,
					sum(weight*(educ4=1)) as educ4,*/
					sum(weight*(age1=1)) as age1,
					sum(weight*(age2=1)) as age2,
					sum(weight*(age3=1)) as age3,
					sum(weight*(age4=1)) as age4,
					mean(wage) as mwage,
					max(wage) as maxwage,
					std(wage) as sdwage,
					mean(log(wage)) as mlwage,
					std(log(wage)) as sdlwage,
					min(wage*(1+log(weight*(PUT((sumweight/&SUM),rk.)="1.F0025")))) as minwF0025,
					min(wage*(1+log(weight*(PUT((sumweight/&SUM),rk.)="2.F2575")))) as minwF2575, 
					min(wage*(1+log(weight*(PUT((sumweight/&SUM),rk.)="3.F7590")))) as minwF7590, 
					min(wage*(1+log(weight*(PUT((sumweight/&SUM),rk.)="4.F9099")))) as minwF9099, 
					min(wage*(1+log(weight*(PUT((sumweight/&SUM),rk.)="5.F9910")))) as minwF9910 /*,
					sum(weight*(PUT((sumweight/&SUM),decile.)="d0")) as d0,
					sum(weight*(PUT((sumweight/&SUM),decile.)="d1")) as d1, 
					sum(weight*(PUT((sumweight/&SUM),decile.)="d2")) as d2, 
					sum(weight*(PUT((sumweight/&SUM),decile.)="d3")) as d3, 
					sum(weight*(PUT((sumweight/&SUM),decile.)="d4")) as d4,
					sum(weight*(PUT((sumweight/&SUM),decile.)="d5")) as d5,
					sum(weight*(PUT((sumweight/&SUM),decile.)="d6")) as d6, 
					sum(weight*(PUT((sumweight/&SUM),decile.)="d7")) as d7, 
					sum(weight*(PUT((sumweight/&SUM),decile.)="d8")) as d8, 
					sum(weight*(PUT((sumweight/&SUM),decile.)="d9")) as d9,
					mean(log(wage*weight*(PUT((sumweight/&SUM),rk.)="1.F0025"))) as mlwF0025,
					mean(log(wage*weight*(PUT((sumweight/&SUM),rk.)="2.F2575"))) as mlwF2575, 
					mean(log(wage*weight*(PUT((sumweight/&SUM),rk.)="3.F7590"))) as mlwF7590, 
					mean(log(wage*weight*(PUT((sumweight/&SUM),rk.)="4.F9099"))) as mlwF9099, 
					mean(log(wage*weight*(PUT((sumweight/&SUM),rk.)="5.F9910"))) as mlwF9910,
					mean(log(wage*weight*(female=1))) as mlwfemale,
					mean(log(wage*weight*(female=0))) as mlwmale, 
					mean(log(wage*weight*(occ1=1))) as mlwocc1, 
					mean(log(wage*weight*(occ2=1))) as mlwocc2, 
					mean(log(wage*weight*(occ3=1))) as mlwocc3,
					mean(log(wage*weight*(migrant=1))) as mlwmigrant,
					mean(log(wage*weight*(nonmigrant=1))) as mlwnonmigrant,		
					std(log(wage*weight*(PUT((sumweight/&SUM),rk.)="1.F0025"))) as sdlwF0025,
					std(log(wage*weight*(PUT((sumweight/&SUM),rk.)="2.F2575"))) as sdlwF2575, 
					std(log(wage*weight*(PUT((sumweight/&SUM),rk.)="3.F7590"))) as sdlwF7590, 
					std(log(wage*weight*(PUT((sumweight/&SUM),rk.)="4.F9099"))) as sdlwF9099, 
					std(log(wage*weight*(PUT((sumweight/&SUM),rk.)="5.F9910"))) as sdlwF9910,
					std(log(wage*weight*(female=1))) as sdlwfemale,
					std(log(wage*weight*(female=0))) as sdlwmale, 
					std(log(wage*weight*(occ1=1))) as sdlwocc1, 
					std(log(wage*weight*(occ2=1))) as sdlwocc2, 
					std(log(wage*weight*(occ3=1))) as sdlwocc3,
					std(log(wage*weight*(migrant=1))) as sdlwmigrant,
					std(log(wage*weight*(nonmigrant=1))) as sdlwnonmigrant*/
					from &b.2 (where=( wage NE .)) group by &unit.;

				 create table est_comp.&b.est (drop=minwF0025 minwF2575 minwF7590 minwF9099 minwF9910)
						as select *, 
						min (minwF0025) as  minawF0025,
						min (minwF2575) as  minawF2575,
						min (minwF7590) as  minawF7590,
						min (minwF9099) as  minawF9099,
						min (minwF9910) as  minawF9910
						from &b.est
						; 
				quit;

				proc datasets lib=user;
					delete &b.est;
				quit;

			%end;
			
			proc datasets LIBRARY=user; 
				DELETE &b.2;
			QUIT;

			data &b.4; set &b.3;
			rk=sumweight/&SUM;

			* rename age_g=age occ_g=occ /*educ_g=educ*/;

			F0025=(PUT(rk,rk.)="1.F0025");
			F2575=(PUT(rk,rk.)="2.F2575");
			F7590=(PUT(rk,rk.)="3.F7590");
			F9099=(PUT(rk,rk.)="4.F9099");
			F9910=(PUT(rk,rk.)="5.F9910");

			d0=(PUT(rk,decile.)="d0");
			d1=(PUT(rk,decile.)="d1");
			d2=(PUT(rk,decile.)="d2");
			d3=(PUT(rk,decile.)="d3");
			d4=(PUT(rk,decile.)="d4");
			d5=(PUT(rk,decile.)="d5");
			d6=(PUT(rk,decile.)="d6");
			d7=(PUT(rk,decile.)="d7");
			d8=(PUT(rk,decile.)="d8");
			d9=(PUT(rk,decile.)="d9");

			if countuny>weight then do; 

				xF0025=(F0025uny-F0025*weight)/(countuny-weight);
				xF2575=(F2575uny-F2575*weight)/(countuny-weight);
				xF7590=(F7590uny-F7590*weight)/(countuny-weight);
				xF9099=(F9099uny-F9099*weight)/(countuny-weight);
				xF9910=(F9910uny-F9910*weight)/(countuny-weight);

				xd0=(d0uny-d0*weight)/(countuny-weight);
				xd1=(d1uny-d1*weight)/(countuny-weight);
				xd2=(d2uny-d2*weight)/(countuny-weight);
				xd3=(d3uny-d3*weight)/(countuny-weight);
				xd4=(d4uny-d4*weight)/(countuny-weight);
				xd5=(d5uny-d5*weight)/(countuny-weight);
				xd6=(d6uny-d6*weight)/(countuny-weight);
				xd7=(d7uny-d7*weight)/(countuny-weight);
				xd8=(d8uny-d8*weight)/(countuny-weight);
				xd9=(d9uny-d9*weight)/(countuny-weight);

				/*xeduc1=(educ1uny-educ1*weight)/(countuny-weight);
				xeduc2=(educ2uny-educ2*weight)/(countuny-weight);
				xeduc3=(educ3uny-educ3*weight)/(countuny-weight);
				xeduc4=(educ4uny-educ4*weight)/(countuny-weight);*/

				/*xocc1=(occ1uny-occ1*weight)/(countuny-weight);
				xocc2=(occ2uny-occ2*weight)/(countuny-weight);
				xocc3=(occ3uny-occ3*weight)/(countuny-weight);*/

				xage1=(age1uny-age1*weight)/(countuny-weight);
				xage2=(age2uny-age2*weight)/(countuny-weight);
				xage3=(age3uny-age3*weight)/(countuny-weight);
				xage4=(age4uny-age4*weight)/(countuny-weight);

				xfemale=(femaleuny-female*weight)/(countuny-weight);
				xmale=(maleuny-weight+female*weight)/(countuny-weight);

				xmigrant=(migrantuny-migrant*weight)/(countuny-weight);
				xnonmigrant=(nonmigrantuny-weight+migrant*weight)/(countuny-weight);

			end;


			all=1;
			if countuny>0 then w_unit=1/countuny;
			run;

			proc datasets LIBRARY=user; 
				DELETE &b.3;
			QUIT;

			proc means data= &b.4 ; 
			Title "&year. &unit Fractile Exposure per Fractile &SUM";
			var wage lnwage within_lnwage w_unit xf0025 xf2575 xf7590 xf9099 xf9910;

			%if &unit=est %then %do;
				class all rk /*orgsize*/ &unit._weight manufacturing fin_service financial_center sector_agg nuts1; 
				types all rk /*rk*orgsize*/ rk*&unit._weight rk*manufacturing rk*fin_service rk*financial_center rk*sector_agg rk*nuts1;
			%end;

			%else %if &unit=wtown %then %do;
				class all rk financial_center; 
				types all rk rk*financial_center;
			%end;

			%else %do;
			class all rk ; 
			format rk rk.;
			%end;

			format rk rk. &unit._weight orgsize. /* orgsize $orgsizeb.*/;
			output out=rec.&b.out&unit.inc;
			weight weight;
			where weight>0;
			run;

			data rec.&b.out&unit.inc; set rec.&b.out&unit.inc; 
			year=&year.;
			run;

			proc means data= &b.4 ; 
			Title "&year. &unit Decile Exposure per Fractile &SUM";
			var wage w_unit xd0-xd9;
			class all rk; 
			types all rk; 
			format rk decile. ;
			output out=rec.&b.out&unit.dinc;
			weight weight;
			where weight>0;
			run;

			data rec.&b.out&unit.dinc; set rec.&b.out&unit.dinc; 
			year=&year.;
			run;

			/*
			proc means data= &b.4 ; 
			Title "&year. &unit Education Exposure per Education";
			var wage w_unit xeduc1 xeduc2 xeduc3 xeduc4 ;
			class all educ; 
			types all educ; 
			output out=rec.&b.out&unit.educ; 
			weight weight;
			run;

			data rec.&b.out&unit.educ; set rec.&b.out&unit.educ; 
			year=&year.;
			run;*/


			/*proc means data= &b.4 ; 
			Title "&year. &unit Occupation Exposure per Occupation";
			var wage w_unit xocc1 xocc2 xocc3;
			class all occ; 
			types all occ; 
			output out=rec.&b.out&unit.occ; 
			weight weight;
			where weight>0;
			run;

			data rec.&b.out&unit.occ; set rec.&b.out&unit.occ; 
			year=&year.;
			run;*/

			proc means data= &b.4 ; 
			Title "&year. &unit Age Exposure per Age";
			var wage w_unit xage1 xage2 xage3 xage4 ;
			class all age;
			types all age;
			output out=rec.&b.out&unit.age;
			weight weight;
			where weight>0;
			run;

			data rec.&b.out&unit.age; set rec.&b.out&unit.age; 
			year=&year.;
			run;

			proc means data= &b.4 ; 
			Title "&year. &unit Gender Exposure per Gender";
			var wage w_unit xfemale xmale ;
			class all female; 
			types all female; 
			output out=rec.&b.out&unit.Sex; 
			weight weight;
			where weight>0;
			run;

			data rec.&b.out&unit.Sex; set rec.&b.out&unit.Sex; 
			year=&year.;
			run;

			proc means data= &b.4 ; 
			Title "&year. &unit migrant Exposure per Foreign Status";
			var wage w_unit xmigrant xnonmigrant ;
			class all migrant; 
			types all migrant; 
			output out=rec.&b.out&unit.mig; 
			weight weight;
			where weight>0;
			run;

			data rec.&b.out&unit.mig; set rec.&b.out&unit.mig; 
			year=&year.;
			run;

			proc datasets lib=user memtype=data /*noprint*/;
				delete &b.4;
			quit;

		%mend;

		%analysis(est);
		%analysis(firm);
		*%analysis(wtown);

		%if &year>1987 %then %analysis(ltown);
			proc datasets lib=user memtype=data /*noprint*/;
			delete &b &b.0 ;
			quit;
%mend;

**************************************************;
*!!!!!!!!!! ADAPT WITH YOUR YEARS!!!!!!!!!!!!!!!!;
*%b(1990); 
*%b(1991); 
*%b(1992); 
*%b(1993); 
*%b(1994); 
*%b(1995); 
*%b(1996); 
*%b(1997); 
*%b(1998); 
*%b(1999); 
*%b(2000); 
*%b(2001); 
*%b(2002); 
*%b(2003); 
*%b(2004); 
*%b(2005); 
*%b(2006); 
*%b(2007); 
*%b(2008); 
*%b(2009); 
%b(2010);
%b(2011); 
%b(2012); 
*%b(2013); 
**************************************************;


*!!!ADAPT WITH YOUR YEARS !;
%macro collect(key,description);
data &key; 
set 
/*
REC.B1990&key
REC.B1991&key
REC.B1992&key
REC.B1993&key
REC.B1994&key
REC.B1995&key
REC.B1996&key
REC.B1997&key
REC.B1998&key
REC.B1999&key
REC.B2000&key
REC.B2001&key
REC.B2002&key
REC.B2003&key
REC.B2004&key
REC.B2005&key
REC.B2006&key
REC.B2007&key
REC.B2008&key
REC.B2009&key*/
REC.B2010&key
REC.B2011&key
REC.B2012&key
/*REC.B2013&key*/
; 
length file $10.;
file="&key";
length description $100.;
description="&description";
run;
%mend;



%macro mcollect(key,des2);

%collect(outest&key , &des2 in establishments);
%collect(outfirm&key , &des2 in firms);
*%collect(outwtown&key , &des2 in working town);
%collect(outltown&key , &des2 in living town);

data rec.&key; set
outest&key 
/*outw_firm&key */
outfirm&key 
/*outwtown&key */
outltown&key ;
run;

%mend;

%mcollect(inc,Exposure income classes);
%mcollect(dinc,Exposure income classes);
*%mcollect(occ,Exposure Occupation);
*%mcollect(educ,Exposure Education);
%mcollect(age,Exposure Age);
%mcollect(sex,Exposure Gender);
%mcollect(mig,Exposure migrant);


*Descriptives; 
%collect(estdes, Descriptives on establishement);
%collect(firmdes, Descriptives on firms);
*%collect(wtowndes, Descriptives on town of work);
%collect(ltowndes, Descriptives on town of living);

data rec.des; set
estdes
firmdes
/*wtowndes*/
ltowndes;
run;


proc datasets lib=rec nolist nodetails;
  delete B:;
quit;

proc datasets lib=user nolist nodetails;
  delete ESTDES FIRMDES LTOWNDES OUT:;
quit;

